The methPLIER User Guide

Ken Takasawa and Ryuji Hamamoto

2022-06-03

1 Introduction

1.1 What the methPLIER can do

2 Installation

To install this package, start R (>= 3.5.0) and enter:

if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")

if (!require("methPLIER", quietly = TRUE)) install.packages("methPLIER")
library(methPLIER)

3 Dataset

data(gse39279.data, package = "methPLIER")
data(gse39279.annot, package = "methPLIER")
data(methPLIER, package = "methPLIER")

3.1 Data overview

gse39279.data[1:5, 1:5]
#>            GSM959318 GSM959328 GSM959330 GSM959332 GSM959334
#> cg00000029 0.5039425 0.3595960 0.1498145 0.2554182 0.2105006
#> cg00000108 0.8833132 0.9030418 0.9173808 0.9079961 0.9045386
#> cg00000109 0.7518248 0.8068435 0.8288920 0.8419195 0.7703245
#> cg00000165 0.4170720 0.2697226 0.5577269 0.3037358 0.7619721
#> cg00000236 0.7836071 0.7821850 0.8232099 0.8192344 0.8179069
gse39279.annot %>%
    head()
#> # A tibble: 6 × 14
#>   AccessionNo SampleName Category     CUREP_Code Stage Smoking Age   Sex   TNM  
#>   <chr>       <chr>      <chr>        <chr>      <chr> <chr>   <chr> <chr> <chr>
#> 1 GSM959318   Tumor 10   adenocarcin… CUREP1-127 I     yes     49    male  T1N0…
#> 2 GSM959328   Tumor 50   adenocarcin… CUREP1-297 I     yes     62    fema… T1N0…
#> 3 GSM959330   Tumor 54   adenocarcin… CUREP1-301 I     yes     68    fema… T2N0…
#> 4 GSM959332   Tumor 49   adenocarcin… CUREP1-296 I     yes     57    fema… T1N0…
#> 5 GSM959334   Tumor 53   adenocarcin… CUREP1-300 I     no      61    fema… T2N0…
#> 6 GSM959336   Tumor 47   adenocarcin… CUREP1-293 I     yes     63    fema… T1N0…
#> # … with 5 more variables: TimeRec <dbl>, TNM_6th <chr>, TNM_7th <chr>,
#> #   TumorSize <dbl>, Recurrence <chr>

4 Preparation for applying methPLIER function

4.1 Get D matrix

D <- getDmatrix(gse39279.data)
D[1:3, 1:5]
#>       GSM959318 GSM959328 GSM959330 GSM959332 GSM959334
#> A1BG  0.6494772 0.6463945 0.5993907 0.6373738 0.6335066
#> A2LD1 0.3252989 0.3416055 0.3733506 0.3348877 0.4208066
#> ABCD4 0.4156986 0.4147418 0.4158108 0.4321149 0.4175707

4.2 Get B matrix

B <- getNewDataB(D, methPLIER)
B[1:3, 1:5]
#>                                    GSM959318   GSM959328   GSM959330
#> 1,REACTOME_METABOLISM_OF_PROTEINS -0.8490864 -0.03498642  0.52888920
#> 2,REACTOME_METABOLISM_OF_PROTEINS  3.3874754 -0.27206180 -0.50793195
#> 3,REACTOME_METABOLISM_OF_PROTEINS -1.5019111  0.18072186  0.09608475
#>                                       GSM959332   GSM959334
#> 1,REACTOME_METABOLISM_OF_PROTEINS -0.0009594372  0.22933095
#> 2,REACTOME_METABOLISM_OF_PROTEINS  0.9829585704 -0.61498552
#> 3,REACTOME_METABOLISM_OF_PROTEINS -0.6480075817  0.02028482

5 Unsupervised classification of B matrix

5.1 Classify and plot Heatmap

ph <- plotHeatmap(B, k = 2, k.lv = 2)

ph$cluster.sample
#> # A tibble: 155 × 3
#>    cluster.sample rowid.sample AccessionNo
#>             <int>        <int> <chr>      
#>  1              1            1 GSM959318  
#>  2              2            2 GSM959328  
#>  3              2            3 GSM959330  
#>  4              1            4 GSM959332  
#>  5              2            5 GSM959334  
#>  6              2            6 GSM959336  
#>  7              1            7 GSM959339  
#>  8              2            8 GSM959344  
#>  9              2            9 GSM959345  
#> 10              2           10 GSM959346  
#> # … with 145 more rows
ph$cluster.LV
#> # A tibble: 524 × 3
#>       LV `1`        `2`        
#>    <int> <list>     <list>     
#>  1     1 <dbl [20]> <dbl [135]>
#>  2     2 <dbl [20]> <dbl [135]>
#>  3     3 <dbl [20]> <dbl [135]>
#>  4     4 <dbl [20]> <dbl [135]>
#>  5     5 <dbl [20]> <dbl [135]>
#>  6     6 <dbl [20]> <dbl [135]>
#>  7     7 <dbl [20]> <dbl [135]>
#>  8     8 <dbl [20]> <dbl [135]>
#>  9     9 <dbl [20]> <dbl [135]>
#> 10    10 <dbl [20]> <dbl [135]>
#> # … with 514 more rows

6 Plot survival plot with the result of clustering

cl <- ph$cluster.sample %>%
    inner_join(gse39279.annot) %>%
    mutate(cluster = paste0("cluster_", cluster.sample), Event = 1) %>%
    dplyr::rename(Time = TimeRec) %>%
    dplyr::select(cluster, AccessionNo, Time, Event)
plotSurvival(cl)

7 Get Top LVs

topLV <- getTopLVs(ph$cluster.LV, "1", "2")
topLV %>%
    arrange(q.value)
#> # A tibble: 357 × 5
#>       LV `1`        `2`          p.value      q.value
#>    <int> <list>     <list>         <dbl>        <dbl>
#>  1   386 <dbl [20]> <dbl [135]> 2.79e-10 0.0000000730
#>  2   524 <dbl [20]> <dbl [135]> 1.81e-10 0.0000000730
#>  3    60 <dbl [20]> <dbl [135]> 6.08e-10 0.0000000848
#>  4    75 <dbl [20]> <dbl [135]> 8.09e-10 0.0000000848
#>  5   256 <dbl [20]> <dbl [135]> 6.59e-10 0.0000000848
#>  6   271 <dbl [20]> <dbl [135]> 9.72e-10 0.0000000849
#>  7    98 <dbl [20]> <dbl [135]> 1.80e- 9 0.000000118 
#>  8   412 <dbl [20]> <dbl [135]> 1.75e- 9 0.000000118 
#>  9    46 <dbl [20]> <dbl [135]> 3.02e- 9 0.000000131 
#> 10    59 <dbl [20]> <dbl [135]> 3.30e- 9 0.000000131 
#> # … with 347 more rows

8 Plot box plot of top 10 LVs

library(patchwork)
topLV %>%
    top_n(10, -q.value) %>%
    pull(LV) %>%
    map(~plotBoxplot(B, .x, ph$cluster.sample)) %>%
    purrr::reduce(`+`) + plot_layout(ncol = 5, guides = "collect")

9 Get Differentially Methylated Genes (DMGs) of top LVs

lv <- 386
dmg <- getDMGsInLV(methPLIER, D, lv, ph$cluster.sample)
dmg %>%
    dim()
#> [1] 2775    6
dmg %>%
    head()
#> # A tibble: 6 × 6
#>   Gene  id.gene `1`        `2`               p.value     q.value
#>   <chr>   <int> <list>     <list>              <dbl>       <dbl>
#> 1 AASDH    4026 <dbl [20]> <dbl [135]> 0.000243      0.000689   
#> 2 ABCA5    3225 <dbl [20]> <dbl [135]> 0.000426      0.00115    
#> 3 ABCA7    1679 <dbl [20]> <dbl [135]> 0.00000106    0.00000527 
#> 4 ABCB8    1666 <dbl [20]> <dbl [135]> 0.00000000744 0.000000163
#> 5 ABCB9    4291 <dbl [20]> <dbl [135]> 0.000212      0.000609   
#> 6 ABCC3     323 <dbl [20]> <dbl [135]> 0.00268       0.00583

10 Get Differentially Methylated Probes (DMPs) of DMGs

dmp <- getDMPs(gse39279.data, ph$cluster.sample, base::unique(dmg$Gene), threshold = 0.05,
    method = "fdr")
dmp %>%
    head()
#> # A tibble: 6 × 10
#>   TargetID   `1`        `2`   p.value q.value genesUniq CpG_chrm CpG_beg CpG_end
#>   <chr>      <list>     <lis>   <dbl>   <dbl> <chr>     <chr>      <dbl>   <dbl>
#> 1 cg00000029 <dbl [20]> <dbl> 3.79e-4 1.10e-3 RBL2      chr16     5.34e7  5.34e7
#> 2 cg00000714 <dbl [20]> <dbl> 4.12e-4 1.18e-3 TSEN34    chr19     5.42e7  5.42e7
#> 3 cg00000924 <dbl [20]> <dbl> 5.79e-3 1.25e-2 KCNQ1     chr11     2.70e6  2.70e6
#> 4 cg00001582 <dbl [20]> <dbl> 1.36e-8 5.77e-7 ZMIZ1     chr10     7.91e7  7.91e7
#> 5 cg00003091 <dbl [20]> <dbl> 3.15e-5 1.23e-4 SLBP      chr4      1.71e6  1.71e6
#> 6 cg00003091 <dbl [20]> <dbl> 3.15e-5 1.23e-4 TACC3     chr4      1.71e6  1.71e6
#> # … with 1 more variable: probe_strand <chr>

11 Get Pathway of DMGs

pathway <- getPathway(dmg %>%
    distinct(Gene) %>%
    pull(Gene))

11.1 Bar plot

pathway$barplot

11.2 Dot plot

pathway$dotplot

11.3 Tree plot

pathway$treeplot

12 Gviz plot

12.1 Make Gviz object

# Get gene list contained in the ontology 'Metastatic malignant neoplasm to
# brain'
gene.list <- pathway$edox2@result %>%
    top_n(1, -(qvalue)) %>%
    pull(geneID) %>%
    str_split(., "\\/") %>%
    unlist()
gz <- gene.list %>%
    map(~.x %>%
        makeGvizObj(., gse39279.data))
names(gz) <- gene.list

12.2 Plot

# Plot ERBB2 as example
gz$ERBB2 %>%
    plotGvizObj(.)


# If you want to plot all of the listed gene, execute below script.
# dir.create('figures/gviz_1/', recursive = TRUE) gz %>% imap(~ plotGvizObj(.x,
# filename = .y, dir = 'figures/gviz_1'))

12.3 Plot Gviz with boxplot

plotBoxplot.gviz(gz$ERBB2, as.factor(ph$cluster.sample$cluster.sample), gz$ERBB2$probe$TargetID[1:5])

13 Session Information

sessionInfo()
#> R version 4.1.1 (2021-08-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Catalina 10.15.7
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] patchwork_1.1.1     survival_3.3-1      methPLIER_0.1.0    
#>  [4] PLIER_0.99.0        qvalue_2.26.0       rsvd_1.0.5         
#>  [7] glmnet_4.1-4        Matrix_1.4-1        pheatmap_1.0.12    
#> [10] gplots_3.1.3        RColorBrewer_1.1-3  BiocManager_1.30.18
#> [13] magrittr_2.0.3      forcats_0.5.1       stringr_1.4.0      
#> [16] dplyr_1.0.9         purrr_0.3.4         readr_2.1.2        
#> [19] tidyr_1.2.0         tibble_3.1.7        ggplot2_3.3.6      
#> [22] tidyverse_1.3.1     rmdformats_1.0.4    knitr_1.39         
#> 
#> loaded via a namespace (and not attached):
#>   [1] rappdirs_0.3.3              rtracklayer_1.54.0         
#>   [3] bit64_4.0.5                 DelayedArray_0.20.0        
#>   [5] data.table_1.14.2           rpart_4.1.16               
#>   [7] AnnotationFilter_1.18.0     KEGGREST_1.34.0            
#>   [9] RCurl_1.98-1.6              doParallel_1.0.17          
#>  [11] generics_0.1.2              GenomicFeatures_1.46.5     
#>  [13] BiocGenerics_0.40.0         RSQLite_2.2.14             
#>  [15] shadowtext_0.1.2            bit_4.0.4                  
#>  [17] tzdb_0.3.0                  enrichplot_1.14.2          
#>  [19] xml2_1.3.3                  lubridate_1.8.0            
#>  [21] SummarizedExperiment_1.24.0 assertthat_0.2.1           
#>  [23] viridis_0.6.2               xfun_0.31                  
#>  [25] hms_1.1.1                   jquerylib_0.1.4            
#>  [27] evaluate_0.15               fansi_1.0.3                
#>  [29] restfulr_0.0.13             progress_1.2.2             
#>  [31] caTools_1.18.2              dbplyr_2.1.1               
#>  [33] readxl_1.4.0                km.ci_0.5-6                
#>  [35] igraph_1.3.1                DBI_1.1.2                  
#>  [37] htmlwidgets_1.5.4           stats4_4.1.1               
#>  [39] ellipsis_0.3.2              ggnewscale_0.4.7           
#>  [41] ggpubr_0.4.0                backports_1.4.1            
#>  [43] bookdown_0.26               biomaRt_2.50.3             
#>  [45] MatrixGenerics_1.6.0        vctrs_0.4.1                
#>  [47] Biobase_2.54.0              ensembldb_2.18.4           
#>  [49] abind_1.4-5                 cachem_1.0.6               
#>  [51] withr_2.5.0                 ggforce_0.3.3              
#>  [53] Gviz_1.38.4                 BSgenome_1.62.0            
#>  [55] checkmate_2.1.0             GenomicAlignments_1.30.0   
#>  [57] treeio_1.18.1               prettyunits_1.1.1          
#>  [59] cluster_2.1.3               DOSE_3.20.1                
#>  [61] ape_5.6-2                   lazyeval_0.2.2             
#>  [63] crayon_1.5.1                pkgconfig_2.0.3            
#>  [65] labeling_0.4.2              tweenr_1.0.2               
#>  [67] GenomeInfoDb_1.30.1         ProtGenerics_1.26.0        
#>  [69] nlme_3.1-157                nnet_7.3-17                
#>  [71] rlang_1.0.2                 lifecycle_1.0.1            
#>  [73] filelock_1.0.2              BiocFileCache_2.2.1        
#>  [75] modelr_0.1.8                dichromat_2.0-0.1          
#>  [77] cellranger_1.1.0            polyclip_1.10-0            
#>  [79] matrixStats_0.62.0          aplot_0.1.4                
#>  [81] KMsurv_0.1-5                carData_3.0-5              
#>  [83] zoo_1.8-10                  reprex_2.0.1               
#>  [85] base64enc_0.1-3             GlobalOptions_0.1.2        
#>  [87] png_0.1-7                   viridisLite_0.4.0          
#>  [89] rjson_0.2.21                bitops_1.0-7               
#>  [91] KernSmooth_2.23-20          Biostrings_2.62.0          
#>  [93] blob_1.2.3                  shape_1.4.6                
#>  [95] jpeg_0.1-9                  rstatix_0.7.0              
#>  [97] gridGraphics_0.5-1          S4Vectors_0.32.4           
#>  [99] ggsignif_0.6.3              scales_1.2.0               
#> [101] memoise_2.0.1               plyr_1.8.7                 
#> [103] zlibbioc_1.40.0             compiler_4.1.1             
#> [105] scatterpie_0.1.7            BiocIO_1.4.0               
#> [107] clue_0.3-60                 Rsamtools_2.10.0           
#> [109] cli_3.3.0                   XVector_0.34.0             
#> [111] htmlTable_2.4.0             formatR_1.12               
#> [113] Formula_1.2-4               MASS_7.3-57                
#> [115] tidyselect_1.1.2            stringi_1.7.6              
#> [117] highr_0.9                   yaml_2.3.5                 
#> [119] GOSemSim_2.20.0             latticeExtra_0.6-29        
#> [121] ggrepel_0.9.1               survMisc_0.5.6             
#> [123] grid_4.1.1                  VariantAnnotation_1.40.0   
#> [125] sass_0.4.1                  fastmatch_1.1-3            
#> [127] tools_4.1.1                 parallel_4.1.1             
#> [129] circlize_0.4.15             rstudioapi_0.13            
#> [131] foreach_1.5.2               foreign_0.8-82             
#> [133] gridExtra_2.3               farver_2.1.0               
#> [135] ggraph_2.0.5                digest_0.6.29              
#> [137] ggtext_0.1.1                Rcpp_1.0.8.3               
#> [139] gridtext_0.1.4              GenomicRanges_1.46.1       
#> [141] car_3.0-13                  broom_0.8.0                
#> [143] httr_1.4.3                  survminer_0.4.9            
#> [145] AnnotationDbi_1.56.2        biovizBase_1.42.0          
#> [147] ComplexHeatmap_2.10.0       colorspace_2.0-3           
#> [149] rvest_1.0.2                 XML_3.99-0.9               
#> [151] fs_1.5.2                    IRanges_2.28.0             
#> [153] splines_4.1.1               yulab.utils_0.0.4          
#> [155] tidytree_0.3.9              graphlayouts_0.8.0         
#> [157] ggplotify_0.1.0             xtable_1.8-4               
#> [159] jsonlite_1.8.0              ggtree_3.2.1               
#> [161] tidygraph_1.2.1             ggfun_0.0.6                
#> [163] R6_2.5.1                    Hmisc_4.7-0                
#> [165] pillar_1.7.0                htmltools_0.5.2            
#> [167] glue_1.6.2                  fastmap_1.1.0              
#> [169] BiocParallel_1.28.3         codetools_0.2-18           
#> [171] fgsea_1.20.0                utf8_1.2.2                 
#> [173] lattice_0.20-45             bslib_0.3.1.9000           
#> [175] curl_4.3.2                  gtools_3.9.2.1             
#> [177] GO.db_3.14.0                rmarkdown_2.14             
#> [179] munsell_0.5.0               DO.db_2.9                  
#> [181] GetoptLong_1.0.5            GenomeInfoDbData_1.2.7     
#> [183] iterators_1.0.14            haven_2.5.0                
#> [185] reshape2_1.4.4              gtable_0.3.0

14 Reference